Intro

In this script, I fit simplified models to density data so that I can predict densities on the condition data and pred grid

Load libraries

rm(list = ls())

library(tidyverse); theme_set(theme_light(base_size = 12))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
#remotes::install_github("pbs-assess/sdmTMB")
library(sdmTMB)
# To load entire cache in interactive r session, do:
# qwraps2::lazyload_cache_dir(path = "R/clean_data/cod_fle_density_models_as_covars_cache/html")

For maps

world <- ne_countries(scale = "medium", returnclass = "sf")

# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22

map_data <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf", continent = "europe")

# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
  st_crop(map_data,
          c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))

# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)

#ggplot(swe_coast_proj) + geom_sf() 

Load data

d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cpue.csv")

# Filter to match the data I want to predict on
d <- d %>% filter(year > 1992 & year < 2020)

# Calculate standardized variables
d <- d %>% 
  mutate(depth_sc = (depth - mean(depth))/sd(depth)) %>%
  mutate(year = as.integer(year)) %>% 
  drop_na(depth) %>% 
  rename("density_cod" = "density") # to fit better with how flounder is named

# Explore data a bit
ggplot(d) + 
  geom_point(aes(year, density_cod, color = factor(sub_div))) + 
  facet_wrap(~sub_div)


# Explore data a bit
ggplot(d) + 
  geom_point(aes(year, density_fle, color = factor(sub_div))) + 
  facet_wrap(~sub_div)

Make mesh

# Make mesh:
spde <- make_mesh(d, xy_cols = c("X", "Y"),
                  n_knots = 150, 
                  type = "kmeans", seed = 42)

plot(spde)

Fit models

# Depth spline + oxy spline
# Cod model
mcod <- sdmTMB(density_cod ~ 0 + as.factor(year) + s(depth_sc),
               data = d, mesh = spde, family = tweedie(link = "log"),
               spatiotemporal = "AR1", spatial = "on", time = "year",
               reml = FALSE, control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k` since the degree of wiggliness
#> is determined by the data.

tidy(mcod, conf.int = TRUE)
#>                   term estimate std.error conf.low conf.high
#> 1  as.factor(year)1993 4.454140 0.4253550 3.620459  5.287820
#> 2  as.factor(year)1994 4.793433 0.4023658 4.004810  5.582055
#> 3  as.factor(year)1995 5.079760 0.3998733 4.296023  5.863497
#> 4  as.factor(year)1996 5.145407 0.4004867 4.360468  5.930346
#> 5  as.factor(year)1997 4.010807 0.3998520 3.227111  4.794502
#> 6  as.factor(year)1998 4.280269 0.3978222 3.500552  5.059987
#> 7  as.factor(year)1999 4.290515 0.3970853 3.512242  5.068788
#> 8  as.factor(year)2000 4.576831 0.3971113 3.798507  5.355154
#> 9  as.factor(year)2001 4.957168 0.3842108 4.204129  5.710208
#> 10 as.factor(year)2002 5.363282 0.3818835 4.614804  6.111760
#> 11 as.factor(year)2003 4.553489 0.4017928 3.765989  5.340988
#> 12 as.factor(year)2004 4.210470 0.3941435 3.437963  4.982977
#> 13 as.factor(year)2005 5.057329 0.3753104 4.321734  5.792923
#> 14 as.factor(year)2006 4.985293 0.3653520 4.269216  5.701369
#> 15 as.factor(year)2007 4.976487 0.3645525 4.261977  5.690997
#> 16 as.factor(year)2008 5.261581 0.3629398 4.550232  5.972930
#> 17 as.factor(year)2009 5.359733 0.3621358 4.649960  6.069507
#> 18 as.factor(year)2010 5.252602 0.3642944 4.538598  5.966606
#> 19 as.factor(year)2011 4.619708 0.3652426 3.903845  5.335570
#> 20 as.factor(year)2012 4.397305 0.3688655 3.674342  5.120268
#> 21 as.factor(year)2013 4.741467 0.3655587 4.024985  5.457949
#> 22 as.factor(year)2014 4.705363 0.3682657 3.983576  5.427150
#> 23 as.factor(year)2015 4.601450 0.3681050 3.879977  5.322922
#> 24 as.factor(year)2016 4.340151 0.3685396 3.617826  5.062475
#> 25 as.factor(year)2017 4.527776 0.3662835 3.809874  5.245679
#> 26 as.factor(year)2018 4.167567 0.3677799 3.446731  4.888402
#> 27 as.factor(year)2019 4.164741 0.3994959 3.381744  4.947739

# Check residuals of models
d$residualsmcod <- residuals(mcod)
qqnorm(d$residualsmcod); abline(a = 0, b = 1)


# Save model (so that I can add density predictions to the pred grid for condition)
saveRDS(mcod, "output/mcod.rds")
# Fit flounder model
mfle <- sdmTMB(density_fle ~ 0 + as.factor(year) + s(depth_sc),
               data = d, mesh = spde, family = tweedie(link = "log"),
               spatiotemporal = "AR1", spatial = "on", time = "year",
               reml = FALSE, control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k` since the degree of wiggliness
#> is determined by the data.

tidy(mfle, conf.int = TRUE)
#>                   term estimate std.error conf.low conf.high
#> 1  as.factor(year)1993 3.949785 0.3918821 3.181711  4.717860
#> 2  as.factor(year)1994 3.907088 0.3762462 3.169659  4.644517
#> 3  as.factor(year)1995 4.226126 0.3737753 3.493540  4.958712
#> 4  as.factor(year)1996 4.096355 0.3747133 3.361930  4.830779
#> 5  as.factor(year)1997 3.595180 0.3727111 2.864680  4.325680
#> 6  as.factor(year)1998 3.563202 0.3695285 2.838940  4.287465
#> 7  as.factor(year)1999 3.770240 0.3664361 3.052038  4.488442
#> 8  as.factor(year)2000 3.969409 0.3689576 3.246265  4.692552
#> 9  as.factor(year)2001 4.396938 0.3568249 3.697574  5.096302
#> 10 as.factor(year)2002 4.783574 0.3526219 4.092447  5.474700
#> 11 as.factor(year)2003 4.040726 0.3649964 3.325346  4.756106
#> 12 as.factor(year)2004 4.060109 0.3581775 3.358094  4.762124
#> 13 as.factor(year)2005 4.169854 0.3476480 3.488477  4.851232
#> 14 as.factor(year)2006 4.403522 0.3390383 3.739019  5.068025
#> 15 as.factor(year)2007 4.615532 0.3368432 3.955331  5.275732
#> 16 as.factor(year)2008 4.793969 0.3366126 4.134221  5.453718
#> 17 as.factor(year)2009 4.660473 0.3365410 4.000865  5.320082
#> 18 as.factor(year)2010 4.808711 0.3362852 4.149604  5.467818
#> 19 as.factor(year)2011 4.436690 0.3362290 3.777693  5.095687
#> 20 as.factor(year)2012 4.462359 0.3384255 3.799057  5.125660
#> 21 as.factor(year)2013 4.585104 0.3371744 3.924254  5.245953
#> 22 as.factor(year)2014 4.249861 0.3403799 3.582729  4.916994
#> 23 as.factor(year)2015 4.227479 0.3399941 3.561103  4.893855
#> 24 as.factor(year)2016 4.232719 0.3394271 3.567454  4.897984
#> 25 as.factor(year)2017 4.382421 0.3384405 3.719090  5.045752
#> 26 as.factor(year)2018 4.267328 0.3406859 3.599596  4.935060
#> 27 as.factor(year)2019 4.449725 0.3635740 3.737133  5.162317

# Check residuals of models
d$residualsmfle <- residuals(mfle)
qqnorm(d$residualsmfle); abline(a = 0, b = 1)


# Save model (so that I can add density predictions to the pred grid for condition)
saveRDS(mfle, "output/mfle.rds")

I want to explore the flounder model a little bit more before I trust it, because in contrast to cod, I haven’t done so yet

# Read in pred_grid2 which has oxygen values at location and time and depth:
pred_grid2 <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/pred_grid2.csv")
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   X = col_double(),
#>   Y = col_double(),
#>   depth = col_double(),
#>   year = col_double(),
#>   deep = col_character(),
#>   oxy = col_double(),
#>   temp = col_double(),
#>   lon = col_double(),
#>   lat = col_double(),
#>   biomass_saduria = col_double(),
#>   subdiv = col_character(),
#>   subdiv2 = col_character(),
#>   sub_div = col_double(),
#>   ices_rect = col_character()
#> )

# Standardize data with respect to prediction grid:
pred_grid2 <- pred_grid2 %>%
  mutate(year = as.integer(year)) %>% 
  filter(year %in% c(unique(d$year))) %>% 
  mutate(depth_sc = (depth - mean(d$depth))/sd(d$depth),
         temp_sc = (temp - mean(d$temp))/sd(d$temp),
         oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy)) %>% # Need to scale these to the mean and sd in the data!
  drop_na(oxy, depth, temp)
#> mutate: converted 'year' from double to integer (0 new NA)
#> filter: no rows removed
#> mutate: new variable 'depth_sc' (double) with 6,429 unique values and 0% NA
#>         new variable 'temp_sc' (double) with one unique value and 100% NA
#>         new variable 'oxy_sc' (double) with 250,804 unique values and 2% NA
#> drop_na: removed 6,615 rows (3%), 249,453 rows remaining

# Predict on the pred grid, calculate index
preds_fle <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 2*2)
index <- get_index(preds_fle, bias_correct = FALSE)
index <- index %>% mutate(est_t = est/1000, lwr_t = lwr/1000, upr_t = upr/1000)
#> mutate: new variable 'est_t' (double) with 27 unique values and 0% NA
#>         new variable 'lwr_t' (double) with 27 unique values and 0% NA
#>         new variable 'upr_t' (double) with 27 unique values and 0% NA

# Plot index
ggplot() +
  geom_line(data = index, aes(year, est_t, color = "blue")) +
  geom_ribbon(data = index, aes(year, ymin = lwr_t, ymax = upr_t, fill = "blue"), alpha = 0.4) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20)) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(color = FALSE) + 
  labs(x = "Year", y = "Tonnes", fill = "Sub Divisions", label = "") +
  theme_light(base_size = 11) +
  theme(axis.text.x = element_text(angle = 30),
        legend.position = c(0.8, 0.8),
        legend.background = element_blank()) +
  NULL
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.


# Calculate an index that corresponds to the average so that I can compare it to the data
ncells <- filter(pred_grid2, year == max(pred_grid2$year)) %>% nrow()
#> filter: removed 240,214 rows (96%), 9,239 rows remaining
preds_fle_ave <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 1/ncells)
index_ave <- get_index(preds_fle_ave, bias_correct = FALSE)

d_sum <- d %>%
  ungroup() %>%
  group_by(year) %>%
  summarise(mean_density_fle = mean(density_fle))
#> ungroup: no grouping variables
#> group_by: one grouping variable (year)
#> summarise: now 27 rows and 2 columns, ungrouped

ggplot(index_ave, aes(year, est, color = "prediction")) +
  geom_line() +
  geom_ribbon(aes(ymin = lwr, ymax = upr, fill = "prediction"), alpha = 0.1) +
  geom_point(data = d_sum, aes(year, mean_density_fle, color = "data", size = 1.1)) +
  geom_line(data = d_sum, aes(year, mean_density_fle, color = "data"), linetype = 2) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(fill = FALSE) +
  labs(x = "Year", y = expression(paste("Density [kg/", km^2, "]", sep = ""))) +
  NULL
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.


# Plot predictions on map
ggplot(swe_coast_proj) +
  geom_raster(data = preds_fle$data, aes(x = X * 1000, y = Y * 1000, fill = exp(est))) +
  geom_sf(size = 0.3) +
  scale_fill_viridis_c(trans = "sqrt") +
  facet_wrap(~ year, ncol = 5) +
  labs(x = "Longitude", y = "Latitude", fill = expression(kg/km^2)) +
  ggtitle("Predicted density (fixed + random)")


## Interesting! Now predict these model on dat - i.e. the condition data so that I can use those as covariates